import msaf
import pandas as pd
pd.set_option('precision', 4)
import numpy as np
import musicntd.scripts.overall_scripts as scr
import musicntd.data_manipulation as dm

Segmentation results of baselines methods¶

In this notebook are presented the results of the baseline, computed with MSAF.

We restricted the baseline to three algorithms:

CNMF [1],
Foote's novelty [2],
Spectral Clustering [3].

desired_algos = ["cnmf", "foote", "scluster"]

folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\Entire RWC"
annotations_type = "MIREX10"
annotations_folder = "C:\\Users\\amarmore\\Desktop\\Audio samples\\RWC Pop\\annotations\\{}".format(annotations_type)

Below is the code to compute these scores.

def parse_all_algos(song_path, references_segments, bars):
    # Function which computes all frontiers for this song by the desired algorithms,
    # and then computes segmentation scores.
    zero_five_to_return = []
    three_to_return = []
    
    five_rates = []
    three_rates = []

    for algo in desired_algos:
        boundaries, _ = msaf.process(song_path, boundaries_id=algo)
        segments = np.array(dm.frontiers_to_segments(boundaries))

        zero_five_to_return.append(dm.compute_score_of_segmentation(
            references_segments, segments, window_length=0.5))
        three_to_return.append(dm.compute_score_of_segmentation(
            references_segments, segments, window_length=3))
        
        five_rates.append(dm.compute_rates_of_segmentation(
            references_segments, segments, window_length=0.5))
        original_rates = dm.compute_rates_of_segmentation(
            references_segments, segments, window_length=3)
        three_rates.append(original_rates)
        
        aligned_segments = dm.align_segments_on_bars(segments, bars)
        zero_five_to_return.append(dm.compute_score_of_segmentation(
            references_segments, aligned_segments, window_length=0.5))
        three_to_return.append(dm.compute_score_of_segmentation(
            references_segments, aligned_segments, window_length=3))
        
        five_rates.append(dm.compute_rates_of_segmentation(
            references_segments, aligned_segments, window_length=0.5))
        aligned_rates = dm.compute_rates_of_segmentation(
            references_segments, aligned_segments, window_length=3)
        three_rates.append(aligned_rates)
        
    return zero_five_to_return, three_to_return, five_rates, three_rates

# Script which parses all songs of RWC, computes its frontiers for all algorithms, and then 
zero_point_five_results = []
three_seconds_results = []
five_rates_results = []
three_rates_results = []

paths = scr.load_RWC_dataset(folder, annotations_type = annotations_type)
persisted_path = "C:\\Users\\amarmore\\Desktop\\data_persisted\\"

for song_and_annotations in paths:
    song_path = folder + "\\" + song_and_annotations[0]
    print(song_and_annotations[0])

    annot_path = "{}\\{}".format(annotations_folder, song_and_annotations[1])
    annotations = dm.get_segmentation_from_txt(annot_path, annotations_type)
    references_segments = np.array(annotations)[:,0:2]
    
    bars = scr.load_or_save_bars(persisted_path, song_path)
    this_zero, this_three, five_rates, three_rates = parse_all_algos(song_path, references_segments, bars)

    zero_point_five_results.append(this_zero)
    three_seconds_results.append(this_three)
    
    five_rates_results.append(five_rates)
    three_rates_results.append(three_rates)

Finally, we print in a nice way the scores computed on the baseline.

zerofive = np.array(zero_point_five_results)
three = np.array(three_seconds_results)

all_algos = [alg for alg in desired_algos]

params = ['Original', 'Aligned on downbeats']
line = []
subline = []
for i in all_algos:
    for j in params:
        line.append(i)
        subline.append(j)
arr = []
col = [np.array(['0.5 seconds','0.5 seconds','0.5 seconds','3 seconds','3 seconds','3 seconds']),
    np.array(['Precision', 'Recall', 'F measure','Precision', 'Recall', 'F measure'])]

nested_lines = [np.array(line), np.array(subline)]

for i in range(len(line)):
    arr.append([np.mean(zerofive[:,i,0]),np.mean(zerofive[:,i,1]), np.mean(zerofive[:,i,2]),
            np.mean(three[:,i,0]),np.mean(three[:,i,1]), np.mean(three[:,i,2])])

pd.DataFrame(np.array(arr), index=nested_lines, columns=col)

References¶

[1] Nieto, O., & Jehan, T. (2013, May). Convex non-negative matrix factorization for automatic music structure identification. In 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (pp. 236-240). IEEE.

[2] Foote, J. (2000, July). Automatic audio segmentation using a measure of audio novelty. In 2000 IEEE International Conference on Multimedia and Expo. ICME2000. Proceedings. Latest Advances in the Fast Changing World of Multimedia (Cat. No. 00TH8532) (Vol. 1, pp. 452-455). IEEE.

[3] McFee, B., & Ellis, D. (2014). Analyzing Song Structure with Spectral Clustering. In ISMIR (pp. 405-410).

		0.5 seconds			3 seconds
		Precision	Recall	F measure	Precision	Recall	F measure
cnmf	Original	0.2284	0.2146	0.2152	0.4676	0.4517	0.4469
cnmf	Aligned on downbeats	0.3157	0.2811	0.2881	0.5068	0.4537	0.4653
foote	Original	0.2965	0.2230	0.2514	0.6389	0.4859	0.5449
foote	Aligned on downbeats	0.4203	0.2995	0.3448	0.6706	0.4766	0.5501
scluster	Original	0.3123	0.3045	0.2944	0.6065	0.6084	0.5812
scluster	Aligned on downbeats	0.4921	0.4503	0.4501	0.6554	0.6056	0.6030